R script to analyse the data on virome sequencing of single Belgian mosquitoes.
necessary_packages <- c('tidyverse', 'data.table', 'scales', 'knitr', 'grid', 'magrittr', 'DT', 'here', 'gridtext',
'ggpubr', 'ggthemes', 'reshape2', 'viridis', 'pals', 'circlize', 'scatterpie', 'ggh4x',
'vegan', 'phyloseq', 'metagenomeSeq', 'ComplexHeatmap', 'decontam')
lapply(necessary_packages, library, character.only = TRUE)
i_am("BelgianMosquitoVirome.R")
setwd(here::here())
#setwd("/Users/lander/OneDrive - KU Leuven/Documents/Manuscripts/Belgian mosquitoes (2022)/BMV-analysis/")
source("BMV_functions.R")## [1] "R version 4.1.3 (2022-03-10)"
## [1] "Running under: macOS Big Sur/Monterey 10.16"
## [1] "Biobase 2.52.0" "BiocGenerics 0.38.0" "circlize 0.4.15"
## [4] "ComplexHeatmap 2.8.0" "data.table 1.14.4" "decontam 1.12.0"
## [7] "dplyr 1.1.0" "DT 0.26" "forcats 1.0.0"
## [10] "ggh4x 0.2.4" "ggplot2 3.4.2" "ggpubr 0.6.0"
## [13] "ggthemes 4.2.4" "glmnet 4.1-4" "gridtext 0.1.5"
## [16] "here 1.0.1" "knitr 1.40" "lattice 0.20-45"
## [19] "limma 3.48.3" "lubridate 1.9.2" "magrittr 2.0.3"
## [22] "Matrix 1.4-0" "metagenomeSeq 1.34.0" "pals 1.7"
## [25] "permute 0.9-7" "phyloseq 1.36.0" "purrr 1.0.1"
## [28] "RColorBrewer 1.1-3" "readr 2.1.4" "reshape2 1.4.4"
## [31] "scales 1.2.1" "scatterpie 0.1.8" "stringr 1.5.0"
## [34] "tibble 3.2.1" "tidyr 1.3.0" "tidyverse 2.0.0"
## [37] "vegan 2.6-4" "viridis 0.6.2" "viridisLite 0.4.2"
seqnum_raw <- read.delim("data/sequencing-info.tsv", sep="\t", header=T)
seqnum_raw_summarized <- seqnum_raw %>% group_by(sample) %>%
summarise(sumseqs = sum(num_seqs))
seqnum <- read.delim("data/sequencing-info-hostout.tsv", sep="\t", header=T)
seqnum_summarized <-seqnum %>% group_by(sample) %>%
summarise(sumseqs_nonhost = sum(num_seqs))
seqnum_final <- merge(seqnum_raw_summarized, seqnum_summarized)
seqnum_final <- seqnum_final %>%
mutate(proportion_nonhost=sumseqs_nonhost/sumseqs*100)
datatable(seqnum_final)max(seqnum_final$sumseqs_nonhost)## [1] 5294850
min(seqnum_final$sumseqs_nonhost)## [1] 166768
mean(seqnum_final$sumseqs_nonhost)## [1] 1386043
max(seqnum_final$proportion_nonhost)## [1] 84.91982
min(seqnum_final$proportion_nonhost)## [1] 3.138335
mean(seqnum_final$proportion_nonhost)## [1] 25.64182
sum(seqnum_final$sumseqs)## [1] 1069326190
sum(seqnum_final$sumseqs_nonhost)## [1] 273050395
OTU <- read.table("data/abundance-table.txt", header=TRUE, row.names=1, sep="\t", dec=".")
names(OTU) <- gsub(x = names(OTU), pattern = "\\.", replacement = "-")
summary(rowSums(OTU))## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 33 69 1488 235 4549919
summary(colSums(OTU))## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2522 107065 214236 468777 649110 3995008
tax <- read.table("data/BEmosq_classification-1000nt.tsv", header=TRUE, row.names=1, sep="\t", dec=".")
meta <- read.table("data/BEmosq_metadata.csv", header=TRUE, row.names = 1, sep=";", dec=".")
meta <- cbind(Sample=rownames(meta), meta)OTU.UF <- otu_table(as.matrix(OTU), taxa_are_rows=T)
tax.UF <- tax_table(as.matrix(tax))
meta.UF <- sample_data(meta)
BMV <- phyloseq(OTU.UF, tax.UF, meta.UF)
BMV## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 4040 taxa and 207 samples ]
## sample_data() Sample Data: [ 207 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 4040 taxa by 7 taxonomic ranks ]
decontam <- as.data.frame(sample_data(BMV))
decontam$LibrarySize <- sample_sums(BMV)
decontam <- decontam[order(decontam$LibrarySize),]
decontam$Index <- seq(nrow(decontam))
ggplot(data=decontam, aes(x=Index, y=LibrarySize, color=Control)) +
geom_point()+
ggtitle("Library sizes")sample_data(BMV)$is.neg <- sample_data(BMV)$Control == "Yes"
contamdf.prev <- isContaminant(BMV, method="prevalence", neg="is.neg")Number of contaminating contigs:
table(contamdf.prev$contaminant)##
## FALSE TRUE
## 3406 634
ps.pa <- transform_sample_counts(BMV, function(abund) 1*(abund>0))
ps.pa.neg <- prune_samples(sample_data(ps.pa)$Control == "Yes", ps.pa)
ps.pa.pos <- prune_samples(sample_data(ps.pa)$Control == "No", ps.pa)
df.pa <- data.frame(pa.pos=taxa_sums(ps.pa.pos), pa.neg=taxa_sums(ps.pa.neg),
contaminant=contamdf.prev$contaminant)
ggplot(data=df.pa, aes(x=pa.neg, y=pa.pos, color=contaminant)) + geom_point() +
xlab("Prevalence (Negative Controls)") + ylab("Prevalence (True Samples)")+
ggtitle("Prevalence of contaminants in samples vs NCs")ps.noncontam <- prune_taxa(!contamdf.prev$contaminant, BMV)
ps.noncontam## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 3406 taxa and 207 samples ]
## sample_data() Sample Data: [ 207 samples by 13 sample variables ]
## tax_table() Taxonomy Table: [ 3406 taxa by 7 taxonomic ranks ]
ps.noncontam <- prune_samples(sample_data(BMV)$Control!='Yes', ps.noncontam)
BMV <- ps.noncontam
BMV## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 3406 taxa and 197 samples ]
## sample_data() Sample Data: [ 197 samples by 13 sample variables ]
## tax_table() Taxonomy Table: [ 3406 taxa by 7 taxonomic ranks ]
Remove negative controls from metadata table
meta <- meta[meta$SKA_Subspecies!="control",]BMV.V <- subset_taxa(BMV, Kingdom=="Viruses")
BMV.V## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 214 taxa and 197 samples ]
## sample_data() Sample Data: [ 197 samples by 13 sample variables ]
## tax_table() Taxonomy Table: [ 214 taxa by 7 taxonomic ranks ]
EVE_phage <- c("Atrato Retro-like virus", "Gurupi chuvirus-like 1", "Aedes aegypti To virus 1",
"Aedes aegypti To virus 2", "Guato virus", "Kaiowa virus", "Atrato Chu-like virus 1",
"Chuvirus Mos8Chu0", "Chibugado virus", "Prokaryotic dsDNA virus sp.")
BMV.V2 <- subset_taxa(BMV.V, !is.element(Species, EVE_phage))
BMV.V2 <- subset_taxa(BMV.V2, Order!="Caudovirales")
BMV.V2 <- subset_taxa(BMV.V2, Phylum!="Phage")
BMV.V2## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 147 taxa and 197 samples ]
## sample_data() Sample Data: [ 197 samples by 13 sample variables ]
## tax_table() Taxonomy Table: [ 147 taxa by 7 taxonomic ranks ]
Info on sample variables and level of taxonomic ranks:
sample_variables(BMV.V2)## [1] "Sample" "PCR_Subspecies" "SKA_Subspecies" "Species"
## [5] "Sex" "Location" "Control" "visual_genus"
## [9] "visual_species" "lat" "long" "Municipality"
## [13] "is.neg"
rank_names(BMV.V2)## [1] "Kingdom" "Phylum" "Class" "Order" "Family" "Genus" "Species"
Agglomerate taxa on viral Family level:
BMV_family <- BMV_final %>%
tax_glom(taxrank = "Family")
BMV_family## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 26 taxa and 82 samples ]
## sample_data() Sample Data: [ 82 samples by 13 sample variables ]
## tax_table() Taxonomy Table: [ 26 taxa by 7 taxonomic ranks ]
Calculate relative abundance of viral families:
BMV_family_re.abund <- BMV_family %>%
transform_sample_counts(function(x) {round(100*(x/sum(x)),5)} )
BMV_family_re.abund## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 26 taxa and 82 samples ]
## sample_data() Sample Data: [ 82 samples by 13 sample variables ]
## tax_table() Taxonomy Table: [ 26 taxa by 7 taxonomic ranks ]
Count mosquito species per location:
sp_count <- psmelt(BMV_family_re.abund) %>%
select(Sample,SKA_Subspecies,Municipality) %>%
distinct() %>%
group_by(Municipality) %>%
count(SKA_Subspecies) %>%
pivot_wider(names_from=Municipality, values_from = n) %>%
replace(is.na(.), 0) %>%
rename("Villers_Le_Bouillet" = `Villers-Le-Bouillet`)Create list of locations:
loc_order <- c("Natoye", "Vrasene", "Villers_Le_Bouillet",
"Frameries", "Maasmechelen", "Bertem",
"Leuven", "Eupen")Create list of barplots with number of mosquitoes per location:
plist <- list(ggplot()+theme_void()+#ggtitle("Mosquito species")+
theme(title = element_text(size = 6)))
for (i in loc_order){
plist[[i]] <- ggplot(sp_count, aes_string(x = "SKA_Subspecies", y=i, fill="SKA_Subspecies"))+
geom_col()+
#scale_color_viridis_d(begin=0.4, end=0.9)+
scale_fill_viridis_d(begin=0.4, end=0.9, name="")+
theme_classic()+
theme(legend.position = "none",
axis.ticks.x=element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
plot.title = element_text(hjust = 0.5, size=10),
panel.background = element_rect(fill = "transparent"),
plot.background = element_rect(fill = "transparent", color = NA),
axis.line = element_line(colour = 'black', size = 0.25),
axis.ticks = element_line(colour = "black", size = 0.25))+
#ggtitle(gsub("_","-",i))+
scale_y_continuous(limits = c(0,20), expand = c(0,0), position = "right")
}
plist[10] <- list(ggplot()+theme_void())
ggarrange(plotlist = plist[2:9], labels = gsub("_","-",loc_order),
font.label = list(size=10, face="plain"), hjust = 0, vjust = 1)Create colorpalette for viral families:
BMV_smelt <- psmelt(BMV_final)
FamLevel <- levels(as.factor(BMV_smelt[(BMV_smelt$Family!="unclassified"),]$Family))
FamLevel <- c(FamLevel, "unclassified")
BMV_smelt$Family <- factor(BMV_smelt$Family, levels = FamLevel)
myFamCol <- c(stepped(n=20), "#E6550D", "#FD8D3C", "#FDAE6B","lightgrey")
names(myFamCol) <- levels(as.factor(BMV_smelt$Family))Create colors for each mosquito species:
myColors <- viridis::viridis(4,1, begin=0.4, end = 0.9, direction = 1)
names(myColors) <- levels(as.factor(BMV_smelt$SKA_Subspecies))Make table with fungi reads:
BMV_fungi <- subset_taxa(BMV, Phylum %in% c("Ascomycota", "Basidiomycota",
"Chytridiomycota", "Glomeromycota",
"Zygomycota","Neocallimastigomycota",
"Microsporidia"))
BMV_fungi## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 112 taxa and 197 samples ]
## sample_data() Sample Data: [ 197 samples by 13 sample variables ]
## tax_table() Taxonomy Table: [ 112 taxa by 7 taxonomic ranks ]
BMV_fungi <- tax_glom(BMV_fungi, taxrank = 'Phylum')
sample_variables(BMV_fungi)## [1] "Sample" "PCR_Subspecies" "SKA_Subspecies" "Species"
## [5] "Sex" "Location" "Control" "visual_genus"
## [9] "visual_species" "lat" "long" "Municipality"
## [13] "is.neg"
fungi <- as.data.frame(otu_table(BMV_fungi))
fungi2 <- fungi %>%
mutate(Phylum=rownames(.), .before=1) %>%
pivot_longer(cols = 2:198, names_to="Sample", values_to = "fungi_reads") %>%
pivot_wider(names_from = Phylum, values_from = fungi_reads)
fungi2tax_table(BMV_fungi)## Taxonomy Table: [4 taxa by 7 taxonomic ranks]:
## Kingdom Phylum
## NODE_31_length_1205_cov_6.800532_NEMO53|full "Eukaryota" "Ascomycota"
## NODE_8_length_3532_cov_13.946454_NEMO31|full "Eukaryota" "Chytridiomycota"
## NODE_58_length_1305_cov_1096.565961_NEMO36|full "Eukaryota" "Microsporidia"
## NODE_129_length_1223_cov_14.773124_MEMO055|full "Eukaryota" "Basidiomycota"
## Class Order Family Genus
## NODE_31_length_1205_cov_6.800532_NEMO53|full NA NA NA NA
## NODE_8_length_3532_cov_13.946454_NEMO31|full NA NA NA NA
## NODE_58_length_1305_cov_1096.565961_NEMO36|full NA NA NA NA
## NODE_129_length_1223_cov_14.773124_MEMO055|full NA NA NA NA
## Species
## NODE_31_length_1205_cov_6.800532_NEMO53|full NA
## NODE_8_length_3532_cov_13.946454_NEMO31|full NA
## NODE_58_length_1305_cov_1096.565961_NEMO36|full NA
## NODE_129_length_1223_cov_14.773124_MEMO055|full NA
# fungi.df <- pivot_longer(fungi, cols = 1:197, names_to="Sample", values_to = "fungi_reads")
# fungi.df
# meta <- left_join(meta, fungi.df, by="Sample")
# rownames(meta) <- meta$Sample
# sample_data(BMV_final) <- data.frame(meta)Convert phyloseq object to metagenomeseq object to make heatmap:
BMV_metaseq <- phyloseq_to_metagenomeSeq(BMV_final)Aggregate by species:
BMV_metaseq_species <- aggregateByTaxonomy(BMV_metaseq, lvl = "Species", norm = F,
aggfun = colSums, out = "MRexperiment", alternate = T)Count number of unique viral species:
n_species <- length(unique(featureData(BMV_metaseq_species)$Species))Assign mosquito species for each sample:
mosquito_species <- pData(BMV_metaseq_species)$SKA_SubspeciesAssign colors to location:
location <- pData(BMV_metaseq_species)$Municipality
unique_locations <- length(unique(pData(BMV_metaseq_species)$Municipality))
locColors <- viridis::plasma(unique_locations, 1, begin=0, end = 0.9, direction = 1)
names(locColors) <- levels(as.factor(pData(BMV_metaseq_species)$Municipality))Set heatmap colors:
heatmapCols <- colorRampPalette(brewer.pal(9, "YlOrRd"))(200)Calculate average BLASTx values:
blastx <- read.table("data/BEmosquitoes.tsv", header=T, row.names=1, dec=".", sep="\t")
blastx <- dplyr::select(blastx, 2)
blastx.UF <- otu_table(as.matrix(blastx), taxa_are_rows=T)
blastx.ps <- phyloseq(blastx.UF, tax_table(prune_taxa(taxa_sums(BMV.V2) >= 1, BMV.V2)))
blastx_metaseq <- phyloseq_to_metagenomeSeq(blastx.ps)
blastx_metaseq## MRexperiment (storageMode: environment)
## assayData: 143 features, 1 samples
## element names: counts
## protocolData: none
## phenoData: none
## featureData
## featureNames: NODE_101_length_1206_cov_27.850310_MEMO031|full
## NODE_101_length_1285_cov_17.030629_MEMO115|full ...
## NODE_9_length_5477_cov_48.040926_MEMO031|full (143 total)
## fvarLabels: OTUname Kingdom ... Species (8 total)
## fvarMetadata: labelDescription
## experimentData: use 'experimentData(object)'
## Annotation:
#Aggregate by species
blastx_mean <- aggregateByTaxonomy(blastx_metaseq, lvl = "Species", norm = F, aggfun = mean, out = "MRexperiment", alternate = T)
blastx_mean## MRexperiment (storageMode: environment)
## assayData: 45 features, 1 samples
## element names: counts
## protocolData: none
## phenoData: none
## featureData
## featureNames: Aedes aegypti anphevirus Aedes alboannulatus
## orthomyxo-like virus ... Yongsan picorna-like virus 2 (45 total)
## fvarLabels: OTUname Kingdom ... Species (8 total)
## fvarMetadata: labelDescription
## experimentData: use 'experimentData(object)'
## Annotation:
Store average BLASTx identities in dataframe:
rowanno <- as.data.frame(returnAppropriateObj(blastx_mean, log=F, norm=F))
colnames(rowanno)[1] <- "blastx"##
## FALSE TRUE
## 29 16
Create color function for BLASTx values:
col_fun=colorRamp2(c(0,100), c("white","deepskyblue4"))Get Baltimore classification/genome composition for each viral species from ICTV:
taxV <- as.data.frame(tax_table(BMV_final))
baltimore <- read.table("data/ICTV_classification_family.tsv", header=T, sep="\t")
balt_gc <- baltimore %>%
select(Family, Genome.Composition) %>%
distinct() %>%
add_row(Family="Picorna-like", Genome.Composition="ssRNA(+)") %>%
add_row(Family="Orthomyxo-like", Genome.Composition="ssRNA(-)") %>%
add_row(Family="Negeviridae", Genome.Composition="ssRNA(+)") %>%
add_row(Family="Luteoviridae", Genome.Composition="ssRNA(+)")
tax_gc <- left_join(taxV, balt_gc, by="Family") %>%
filter(!Genome.Composition %in% c("ssRNA(+/-)", "ssDNA(+/-)", "ssDNA(+)", "ssRNA")) %>% # remove double genome compositions
mutate(Genome.Composition = case_when(Species == 'Sclerotinia sclerotiorum dsRNA virus 3' ~ 'dsRNA',
Family == 'Reo-like'~'dsRNA',
Species == 'unclassified' ~'unknown',
Family == 'Totiviridae'~'dsRNA',
Species == 'Hubei orthoptera virus 4' ~ 'ssRNA(+)',
Species == 'Hubei odonate virus 15' ~ 'dsRNA',
Species == 'Wuhan insect virus 26' ~ 'dsRNA',
Species == 'Wuhan insect virus 13' ~ 'ssRNA(+)',
Species == "Linepithema humile qinvirus-like virus 1" ~'ssRNA(-)',
Species =="Hubei virga-like virus 23" ~ 'ssRNA(+)',
Species == "Culex mononega like virus 2" ~'ssRNA(-)',
TRUE ~ `Genome.Composition`))
gcanno<-tax_gc %>% select(Genome.Composition, Family)
rownames(gcanno)<-tax_gc$Species
colnames(gcanno) <- c('GC', 'Family')
gcanno<-gcanno[order(rownames(gcanno)) , ,drop=F]
gcanno$Family <- factor(gcanno$Family, levels = FamLevel)alpha <- plot_richness(BMV_final, measures=c("Observed", "Shannon", "Simpson"), x="SKA_Subspecies", color = "SKA_Subspecies")+
geom_boxplot()+
geom_jitter(width=0)+
theme_bw()+
scale_color_viridis_d(begin=0.4, end =0.9, name="", labels=c(expression(paste(italic("Aedes japonicus"), " (n=8)")),
expression(paste(italic("Culex pipiens molestus"), " (n=23)")),
expression(paste(italic("Culex pipiens pipiens"), " (n=48)")),
expression(paste(italic("Culex torrentium"), " (n=3)"))))+
theme(strip.text.x = element_text(size = 10),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.text.align = 0)+
scale_y_continuous(limits = c(0, NA))+
stat_compare_means(method = "wilcox.test", aes_string(x="SKA_Subspecies", y="value",
group="SKA_Subspecies"),
label = "p.format", hide.ns=TRUE, size=3, show.legend = F, tip.length = 0.01,
comparisons = list(c(1, 2),c(1, 3)))
alphaggsave("figures/alpha-diversity-species.pdf", alpha, dpi=300)v.ord <- ordinate(BMV_final, method = "PCoA")
pcoa <- plot_ordination(BMV_final, v.ord, type="samples", color="SKA_Subspecies", shape = "Municipality")+
scale_shape_manual(values = c(15,16,17,3:8), guide =guide_legend(label.theme = element_text(size=10)), name="Location")+
scale_color_viridis_d(begin=0, end =1, name="Mosquito species")+
stat_ellipse(type = "norm", linetype = 2, aes_string(group="SKA_Subspecies"), show.legend = F)+
theme_bw()+
theme(panel.grid.minor = element_blank())+
guides(col = guide_legend(override.aes = list(shape = 15, size = 5),
label.theme = element_text(size=10, face="italic")))Permanova test
metadata_vegan <- as(sample_data(BMV_final), "data.frame")
perm <- adonis2(distance(BMV_final, method="bray") ~ SKA_Subspecies*Municipality,
data = metadata_vegan)
permpval<-perm$`Pr(>F)`[1]
Rsq <- perm$R2[1]
pcoa<-pcoa+
ggplot2::annotate(geom="text",x=-.9, y=c(.57,.51,.45), size=c(4,3,3),hjust=0, label =c("Adonis test",
as.expression(bquote(italic(R^2) ~ "=" ~ .(round(Rsq, 2)))),
as.expression(bquote(italic(p) ~ "=" ~ .(pval)))))
pcoaggsave("figures/PCoA-eukaryotic-virome.pdf", dpi=300)v.ord <- ordinate(BMV_final, method = "NMDS", k=2)## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.007200553
## Run 1 stress 0.005752978
## ... New best solution
## ... Procrustes: rmse 0.1006646 max resid 0.2235644
## Run 2 stress 0.003279026
## ... New best solution
## ... Procrustes: rmse 0.09850435 max resid 0.2686155
## Run 3 stress 0.0029636
## ... New best solution
## ... Procrustes: rmse 0.09741204 max resid 0.3359344
## Run 4 stress 0.004352118
## Run 5 stress 0.005299531
## Run 6 stress 0.002477427
## ... New best solution
## ... Procrustes: rmse 0.09401825 max resid 0.3513714
## Run 7 stress 0.001630306
## ... New best solution
## ... Procrustes: rmse 0.1019217 max resid 0.3358118
## Run 8 stress 0.003565263
## Run 9 stress 0.001991839
## ... Procrustes: rmse 0.1099615 max resid 0.3974304
## Run 10 stress 0.00530823
## Run 11 stress 0.001962359
## ... Procrustes: rmse 0.1063237 max resid 0.4086584
## Run 12 stress 0.002403979
## Run 13 stress 0.003673851
## Run 14 stress 0.003166973
## Run 15 stress 0.001342787
## ... New best solution
## ... Procrustes: rmse 0.1048725 max resid 0.3401675
## Run 16 stress 0.001550655
## ... Procrustes: rmse 0.103021 max resid 0.4376088
## Run 17 stress 0.003068911
## Run 18 stress 0.003268986
## Run 19 stress 0.002562401
## Run 20 stress 0.001887704
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 20: no. of iterations >= maxit
nmds <- plot_ordination(BMV_final, v.ord, type="samples", color="SKA_Subspecies", shape = "Municipality")+
scale_shape_manual(values = c(15,16,17,3:8), guide =guide_legend(label.theme = element_text(size=10)), name="Location")+
scale_color_viridis_d(begin=0, end =1, name="Mosquito species")+
#stat_ellipse(type = "norm", linetype = 2, aes_string(group="SKA_Subspecies"), show.legend = F)+
theme_bw()+
theme(panel.grid.minor = element_blank())+
guides(col = guide_legend(override.aes = list(shape = 15, size = 5),
label.theme = element_text(size=10, face="italic")))
nmdsggsave("figures/NMDS-eukaryotic-virome.pdf", nmds, dpi=300)BMV_no_singletons <- BMV_final
for (i in c("MEMO014","MEMO078","MEMO145","MEMO146","MEMO149","MEMO084","MEMO018","MEMO006",
"NEMO27","NEMO30","NEMO07","NEMO46","NEMO51","MEMO127","MEMO008")) {
BMV_no_singletons <- subset_samples(BMV_no_singletons, Sample != i)
}
BMV_no_singletons## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 45 taxa and 68 samples ]
## sample_data() Sample Data: [ 68 samples by 13 sample variables ]
## tax_table() Taxonomy Table: [ 45 taxa by 7 taxonomic ranks ]
v.ord <- ordinate(BMV_no_singletons, method = "NMDS", k=2)## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.006486394
## Run 1 stress 0.007532973
## Run 2 stress 0.01218761
## Run 3 stress 0.01419612
## Run 4 stress 0.004599115
## ... New best solution
## ... Procrustes: rmse 0.0563464 max resid 0.1385764
## Run 5 stress 0.008878224
## Run 6 stress 0.01143861
## Run 7 stress 0.01069373
## Run 8 stress 0.01296605
## Run 9 stress 0.006977745
## Run 10 stress 0.005414834
## Run 11 stress 0.0103929
## Run 12 stress 0.01093644
## Run 13 stress 0.01094168
## Run 14 stress 0.01515388
## Run 15 stress 0.01384089
## Run 16 stress 0.01258793
## Run 17 stress 0.01595973
## Run 18 stress 0.008897728
## Run 19 stress 0.03007609
## Run 20 stress 0.008167468
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 20: no. of iterations >= maxit
nmds_ns <- plot_ordination(BMV_no_singletons, v.ord, type="samples", color="SKA_Subspecies", shape = "Municipality")+
scale_shape_manual(values = c(15,16,17,3:8), guide =guide_legend(label.theme = element_text(size=10)), name="Location")+
scale_color_viridis_d(begin=0, end =1, name="Mosquito species")+
#stat_ellipse(type = "norm", linetype = 2, aes_string(group="SKA_Subspecies"), show.legend = F)+
theme_bw()+
theme(panel.grid.minor = element_blank())+
guides(col = guide_legend(override.aes = list(shape = 15, size = 5),
label.theme = element_text(size=10, face="italic")))
nmds_nsggsave("figures/NMDS-eukaryotic-virome-no-singletons.pdf", nmds_ns, dpi=300)
v.ord <- ordinate(BMV_no_singletons, method = "PCoA")
pcoa_ns <- plot_ordination(BMV_no_singletons, v.ord, type="samples", color="SKA_Subspecies", shape = "Municipality")+
scale_shape_manual(values = c(15,16,17,3:8), guide =guide_legend(label.theme = element_text(size=10)), name="Location")+
scale_color_viridis_d(begin=0, end =1, name="Mosquito species")+
stat_ellipse(type = "norm", linetype = 2, aes_string(group="SKA_Subspecies"), show.legend = F)+
theme_bw()+
theme(panel.grid.minor = element_blank())+
guides(col = guide_legend(override.aes = list(shape = 15, size = 5),
label.theme = element_text(size=10, face="italic")))
pcoa_nsPermanova test
metadata_vegan_ns <- as(sample_data(BMV_no_singletons), "data.frame")
perm_ns <- adonis2(distance(BMV_no_singletons, method="bray") ~ SKA_Subspecies*Municipality,
data = metadata_vegan_ns)
perm_nspval_ns<-perm_ns$`Pr(>F)`[1]
Rsq_ns <- perm_ns$R2[1]
ord.legend <- cowplot::get_legend(pcoa)
alpha.legend <- cowplot::get_legend(alpha)
plots <- cowplot::align_plots(alpha+theme(legend.position = "none"),
pcoa+theme(legend.position = "none"), align = 'v', axis = 'l')
bottom<-cowplot::plot_grid(plots[[2]],
nmds+theme(legend.position = "none"), labels=c("B","C"))
cowplot::plot_grid(plots[[1]], alpha.legend, bottom, ord.legend, labels = c('A', ''), ncol=2, rel_widths = c(1, .3))#ggarrange(alpha, ggarrange(pcoa, nmds, labels=c("B","C"), common.legend = T, legend = "right"), labels="A", ncol=1)
ggsave("figures/combined-alpha-ordination.pdf", dpi=300)phylobar_location <- merge_samples(BMV_family, "Municipality")
phylobar_location_re.abund <- phylobar_location %>%
transform_sample_counts(function(x) {round(100*(x/sum(x)),5)} )
lc <- plot_relabund(phylobar_location_re.abund, fill = "Family") +
geom_bar(aes(fill=Family), stat="identity", position="stack")+
ylab("Relative abundance (%)")+
xlab("")+
theme_bw()+
theme(axis.text.x = element_text(angle=0, size = 10, vjust = 0.5, hjust = 0.5),
axis.text.y = element_text(size = 10),
axis.title.y = element_text(size = 12, vjust=0.5),
axis.title.x = element_text(size = 12,hjust = 0.5),
axis.ticks.y = element_blank(),
legend.text = element_text(face="italic"))+
scale_fill_manual(values = myFamCol, name="Viral family")+
scale_y_continuous(expand = c(0,0))+
scale_x_discrete(limits=rev(gsub("_","-",loc_order)))+
coord_flip()Combine viral family barplot with mosquito species barplots from each location:
lc2<-ggarrange(lc+theme(legend.position = "none"), ggarrange(plotlist=plist, nrow=10, heights = c(0.2, rep(1,8),0.6)), ncol=2, widths = c(1,0.1))
ggsave("figures/viralfamily_barplot_loc.pdf", lc2, height = 6.92, width = 11.2, dpi=300)phylobar_species <- merge_samples(BMV_family, "SKA_Subspecies")
phylobar_species_re.abund <- phylobar_species %>%
transform_sample_counts(function(x) {round(100*(x/sum(x)),5)} )
sp <- plot_relabund(phylobar_species_re.abund, fill = "Family") +
geom_bar(aes(fill=Family), stat="identity", position="stack")+
ylab("Relative abundance (%)")+
xlab("")+
theme_bw()+
theme(axis.text.x = element_text(angle=0, size = 10, vjust = 0.5, hjust = 0.5, face="italic"),
axis.text.y = element_text(size = 10),
axis.title.y = element_text(size = 12, vjust=0.5),
axis.title.x = element_text(size = 12,hjust = 0.5),
axis.ticks.x = element_blank(),
legend.text = element_text(face="italic"),
legend.title = element_blank())+
scale_fill_manual(values = myFamCol, name="Viral family")+
scale_y_continuous(expand = c(0,0))+
#scale_x_discrete(guide = guide_axis(n.dodge=2))
scale_x_discrete(labels=c("Ae. japonicus", "Cx. p. molestus", "Cx. p. pipiens", "Cx. torrentium"))
ggsave("figures/viralfamily_barplot_species.pdf", sp , height = 6.92, width = 11.2, dpi=300)Create combined legend for viral families and mosquito species:
sp2 <- plot_relabund(phylobar_species_re.abund, fill="Family", color="Sample")+
theme_bw()+
theme(legend.text = element_text(face="italic"),
legend.title = element_blank(),
legend.position = "top",
legend.box.just = "bottom")+
scale_fill_manual(values = myFamCol, name="a")+
scale_color_manual(values = myColors, name="b")+
guides(fill=guide_legend(nrow = 4),
col=guide_legend(override.aes =list(fill = myColors), direction = "vertical"))
sp_leg <- get_legend(sp2)
sp2ggarrange(sp, lc2, common.legend = T, legend = "top", labels ='AUTO',
font.label = list(size = 16), widths = c(0.5,1), legend.grob = sp_leg)ggsave("figures/viralfamily_barplot.pdf", width = 15, height=9, dpi=300)#p.fungi<-log10(pData(BMV_metaseq_species)$fungi_reads+1)
#p.fungi<-pData(BMV_metaseq_species)$fungi_reads
#p.fungi<-as.matrix(fungi2[fungi2$Sample %in% pData(BMV_metaseq_species)$Sample,2:5])
p.fungi<-log10(as.matrix(fungi2[fungi2$Sample %in% pData(BMV_metaseq_species)$Sample,2:5]))
p.fungi[p.fungi==-Inf]<-NA
colnames(p.fungi)<-as.data.frame(tax_table(BMV_fungi))$Phylum
left_ra=rowAnnotation('Family'=gcanno$Family,
"Blastx"= rowanno$blastx,
col=list('Family'=myFamCol,
"Blastx"=col_fun),
show_annotation_name=T,
annotation_name_side = "top",
annotation_name_gp = gpar(fontsize = 10, fontface = "bold"),
annotation_name_rot = 270,
annotation_legend_param=list("Blastx" = list(title ="Blastx % Identity",
direction="horizontal",
at=c(0,25,50,75,100)),
"Family"=list(title="Viral family", labels_gp = gpar(fontface="italic"))))
#Draw heatmap
column_ha = HeatmapAnnotation(Location=location,
'Mosquito species' = mosquito_species,
Fungi = anno_points(p.fungi, ylim = c(0, 6),
axis_param = list(side = "right"),
gp = gpar(fill = c(2:4,7), col = c(2:4,7)),
pch=1:length(colnames(p.fungi))),
show_annotation_name = T,
annotation_label = gt_render(c("Location", "Species", "log<sub>10</sub>(Fungi)")),
annotation_name_side = "right",
annotation_name_gp = gpar(fontsize = 10, fontface = "bold"),
annotation_name_rot = 0,
annotation_legend_param=list('Mosquito species'= list(title ='Mosquito species',
labels_gp = gpar(fontface="italic")),
'Location' = list(title='Location')),
col = list('Mosquito species'=c(myColors), Location=c(locColors)))
#n depends on the amount of taxa in 'BMV_metaseq_species'
hm <- plot_abundance(BMV_metaseq_species, n = n_species, log = T, norm = F, colclust = "bray",
col = heatmapCols, name = "Log2 Read Counts",
top_annotation=column_ha,
row_names_gp = gpar(fontsize = 8),
show_column_names = FALSE,
heatmap_legend_param = list(direction = "horizontal"),
cluster_rows = FALSE,
cluster_row_slices = FALSE,
row_split=factor(gcanno$GC,
levels = c("dsDNA","ssDNA","dsRNA","ssRNA(+)","ssRNA(-)","unknown")),
row_order=tax_gc[with(tax_gc, order(tax_gc$Genome.Composition, Family)),"Species"],
left_annotation=left_ra,
row_title_gp = gpar(fontsize=10),
row_title_rot=0,
border=T)Legends
lgd_list<-list(Legend(labels = colnames(p.fungi), title = "Fungi", type = "points", pch = 1:length(colnames(p.fungi)),
legend_gp = gpar(col = c(2:4,7)), background ="transparent"))draw(hm, heatmap_legend_side = "left", annotation_legend_side = "left",
merge_legend = T, legend_grouping="original",
annotation_legend_list = lgd_list)library(ggh4x)
qPCR <- read_csv("data/BMV-RT-qPCR-analysis.csv", col_names = T)Recalculate quantity based on mosquito dilutions:
qPCR <- qPCR %>%
mutate(Quantity = case_when(str_detect(Sample, "MEMO") ~ qPCR$"Quantity Mean" * 40,
str_detect(Sample, "NEMO") ~ qPCR$"Quantity Mean" * 100))Divide quantity of Xanthi chryso-like virus by 2 because it is a dsRNA virus:
qPCR <- qPCR %>%
mutate(Quantity = case_when(str_detect(Target, "XCV") ~ qPCR$Quantity/2,
TRUE ~ Quantity))Replace NA to 0:
qPCR_noNA <- qPCR %>% mutate_all(~replace(., is.na(.), 0))Replace abbreviations for full names:
qPCR_noNA <- qPCR_noNA %>% mutate(Target, Target= case_when(Target=="CPV"~"Culex phasmavirus (CPV)",
Target=="XCV"~"Xanthi chryso-like virus (XCV)",
Target=="WMV6"~"Wuhan Mosquito Virus 6 (WMV6)",
Target=="WMV4"~"Wuhan Mosquito Virus 4 (WMV4)",
Target=="Daeseongdong"~"Daeseongdong virus 2 (DV2)",
Target=="HMV4"~"Hubei mosquito virus 4 (HMV4)",
TRUE ~ Target))Merge metadata with qPCR data: Abrreviate locations to fit the names on the plots later on.
metadata <- meta %>% mutate(Municipality = case_when(Municipality == 'Frameries' ~ 'Fr',
Municipality == 'Dilsen-Stokkem' ~ 'DS',
Municipality == 'Kallo' ~ 'K',
TRUE ~ Municipality))
metaqPCR <- merge.data.frame(qPCR_noNA, metadata, by="Sample")Count number of tested samples per mosquito species:
metaqPCR %>%
select(Sample, SKA_Subspecies) %>%
distinct() %>%
count(SKA_Subspecies)legendlabels <- c(expression(paste(italic("Aedes japonicus"), " (n=8)")),
expression(paste(italic("Culex pipiens molestus"), " (n=47)")),
expression(paste(italic("Culex pipiens pipiens"), " (n=127)")),
expression(paste(italic("Culex torrentium"), " (n=16)")))
qPCRviolin <- ggplot(metaqPCR, aes(x=SKA_Subspecies, y=Quantity))+
geom_violin(aes(fill=SKA_Subspecies, color=SKA_Subspecies), position="identity")+
geom_jitter(aes(color=SKA_Subspecies), size=1, width=0.1)+
theme_bw()+
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
#panel.grid.major = element_blank(),
panel.grid.major.y = element_blank(),
legend.text.align = 0)+
scale_y_continuous(expand=expansion(mult = c(0, 0), add = c(0, 0.1)),
trans=scales::pseudo_log_trans(base = 10),
breaks = c(0, 10^seq(2,8,2)),
labels = scientific_10(c(0, 10^seq(2,8,2))))+
xlab(label = "")+
ylab(label = "Viral copies per mosquito")+
scale_fill_viridis_d(begin=0.4, end =0.9, name="", alpha = 0.5,
labels=legendlabels)+
scale_color_viridis_d(begin=0.4, end =0.9, name="",
labels=legendlabels)+
guides(fill = guide_legend(override.aes = list(alpha = 1)))+
facet_wrap(~Target, nrow=2, ncol=3)
ggsave("figures/qPCR-violin.pdf", dpi=300)Plot qPCR overview per sample:
qPCRoverview <- ggplot(metaqPCR,
aes(x=Sample, y=Quantity))+
facet_nested(Target~Municipality,
space="free",
scales = "free_x",
labeller = labeller(Target = label_wrap_gen(10), Municipality=label_wrap_gen(5)))+
geom_col(aes(fill=SKA_Subspecies), position="identity", alpha=1)+
theme_bw()+
theme(axis.text.x = element_blank(),
panel.grid.major = element_blank())+
scale_y_continuous(expand=expansion(mult = c(0, 0), add = c(0, 0.1)),
trans = scales::pseudo_log_trans(base = 10),
breaks = c(0, 10^seq(2,8,2)),
labels = scientific_10(c(0, 10^seq(2,8,2))))+
xlab(label = "Sample")+
ylab(label = "Viral copies per mosquito")+
scale_fill_viridis_d(begin=0.4, end =0.9, name="")+
guides(fill = guide_legend(override.aes = list(alpha = 1),
label.theme = element_text(size=10, angle = 0, face = "italic")))
ggsave("figures/qPCR_overview.pdf", width = 18.7, height = 10.3, units = "in", dpi=300)DS=Dilsen-Stokkem, Fr=Frameries, K=Kallo
DS=Dilsen-Stokkem, Fr=Frameries, K=Kallo
DS=Dilsen-Stokkem, Fr=Frameries, K=Kallo
DS=Dilsen-Stokkem, Fr=Frameries, K=Kallo
phylo_path <- here("phylogenetics")
phylo_packages <- c('phytools', 'ggtree', 'treeio', 'rphylopic',
'ggimage', 'aplot', 'rcartocolor')
lapply(phylo_packages, library, character.only = TRUE)Determine shape based on which classification was used
shape <- c(ICTV=16, NCBI=15, NODE=17)
shape## ICTV NCBI NODE
## 16 15 17
meta.orthomyxo <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Orthomyxoviridae/"), pattern = "*.csv", full.names = T), read_csv))
orthomyxo.ictv <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Orthomyxoviridae/"), pattern = "*_ictv.csv", full.names = T), read_csv))
orthomyxo <- read.iqtree(here(phylo_path, "Orthomyxoviridae/orthomyxoviridae.treefile"))
orthomyxo@phylo <- phytools::midpoint.root(orthomyxo@phylo)
meta.orthomyxo <- meta.orthomyxo %>%
mutate(Classification = case_when(Accession %in% orthomyxo.ictv$Accession ~ "ICTV",
TRUE ~ "NCBI"))
orthomyxo@phylo$tip.label <- gsub(orthomyxo@phylo$tip.label, pattern = "_", replacement=" ")
orthomyxo@phylo$tip.label <- gsub(orthomyxo@phylo$tip.label, pattern = "P ", replacement="P_")
orthomyxo@phylo$tip.label <- gsub(orthomyxo@phylo$tip.label, pattern = "lcl\\|", replacement="lcl_")
orthomyxo_clean <- clean_phylometa(orthomyxo, metadata = meta.orthomyxo)
orthomyxo_ictv_clean <- clean_phylometa(orthomyxo, metadata = orthomyxo.ictv)
orthomyxo_tree <- group_phylotaxa(orthomyxo, orthomyxo_clean, group = c("Family","Genus", "Host", "Geo_Location", "Classification"))
orthomyxo_tree## 'treedata' S4 object that stored information
## of
## '/Users/lander/Library/CloudStorage/OneDrive-KULeuven/Documents/Manuscripts/Belgian
## mosquitoes
## (2022)/BMV-analysis/phylogenetics/Orthomyxoviridae/orthomyxoviridae.treefile'.
##
## ...@ phylo:
##
## Phylogenetic tree with 37 tips and 36 internal nodes.
##
## Tip labels:
## lcl NODE 2 length 2469 cov 48.088211 MEMO067, YP_009508043.1|Quaranfil
## quaranjavirus, YP_009665204.1|Johnston Atoll quaranjavirus, YP_145804.1|Salmon
## isavirus, YP_145794.1|Thogoto thogotovirus, YP_009352882.1|Dhori thogotovirus,
## ...
## Node labels:
## Root, , 100, 95, 100, 100, ...
##
## Rooted; includes branch lengths.
##
## with the following features available:
## 'SH_aLRT', 'UFboot'.
##
## # The associated data tibble abstraction: 73 × 10
## # The 'node', 'label' and 'isTip' are from the phylo tree.
## node label Family Genus Host Geo_Location Classification isTip SH_aLRT
## <int> <chr> <fct> <fct> <fct> <fct> <fct> <lgl> <dbl>
## 1 1 lcl NODE … NODE NODE NODE NODE NODE TRUE NA
## 2 2 YP_009508… Ortho… Quar… Arga… Afghanistan ICTV TRUE NA
## 3 3 YP_009665… Ortho… Quar… Ixod… New Zealand ICTV TRUE NA
## 4 4 YP_145804… Ortho… Isav… unkn… Canada ICTV TRUE NA
## 5 5 YP_145794… Ortho… Thog… unkn… unknown ICTV TRUE NA
## 6 6 YP_009352… Ortho… Thog… unkn… unknown ICTV TRUE NA
## 7 7 NP_056657… Ortho… Beta… unkn… unknown ICTV TRUE NA
## 8 8 NP_040985… Ortho… Alph… unkn… Puerto Rico ICTV TRUE NA
## 9 9 YP_009449… Ortho… Delt… Sus … USA ICTV TRUE NA
## 10 10 YP_089653… Ortho… Gamm… unkn… unknown ICTV TRUE NA
## # ℹ 63 more rows
## # ℹ 1 more variable: UFboot <dbl>
Give clean metadata ictv!
orthocladedf <- mrca_ictv(tree=orthomyxo_tree, meta = orthomyxo_ictv_clean,
group="Genus")
famvec <- sort(unique(orthomyxo_clean$Genus))
#col <- c(viridis::plasma(n = length(famvec)-1, begin=0.1, end=0.9))
col <- c(rcartocolor::carto_pal(n = length(famvec), "Vivid"))
col <- rev(col)[-1]
names(col) <- famvec[famvec!="unclassified"]
col <- c(col, unclassified="grey", NODE="#43BF71FF")
orthomyxo_tree@phylo$tip.label <- gsub(orthomyxo_tree@phylo$tip.label, pattern = "lcl [ORF]*[0-9]* *", replacement="", perl = T)
p <- plot_phylotree(orthomyxo_tree, col=col, shape=shape, cladedf = orthocladedf, labels=c(famvec, "NODE"),
plainlabels = c("unclassified", "Belgian mosquitoes"), align=F, group="Genus")+
#geom_nodepoint(aes(subset= node %in% orthocladedf$node), size=1, shape=18)+
scale_y_reverse()+
xlim(-.05,4.5)
orthoplot <- addSmallLegend(p, pointSize = 2.5, textSize = 5, spaceLegend = .5)+
theme(legend.text = element_text(vjust = .4))
orthoplotggsave(filename = here(phylo_path, "Orthomyxoviridae/orthomyxoviridae.pdf"), orthoplot, dpi=1200, width=6.875)
orthoplot_minimal <- plot_minimal_phylotree(orthomyxo_tree, align=T)+
scale_y_reverse()+
xlim(-.05,5)
orthoplot_minimalggsave(filename = here(phylo_path, "Orthomyxoviridae/orthomyxoviridae_minimal.pdf"), orthoplot_minimal, dpi=1200, width=6.875)meta.reo <- do.call(rbind, lapply(list.files(path=here::here(phylo_path, "Reoviridae/"), pattern = "*.csv", full.names = T), read_csv))
reo.ictv <- do.call(rbind, lapply(list.files(path=here::here(phylo_path, "Reoviridae/"), pattern = "*_ictv.csv", full.names = T), read_csv))
reo <- read.iqtree(here(phylo_path, "Reoviridae/reoviridae.treefile"))
reo@phylo <- phytools::midpoint.root(reo@phylo)
meta.reo <- meta.reo %>%
mutate(Classification = case_when(Accession %in% reo.ictv$Accession ~ "ICTV",
TRUE ~ "NCBI"))
reo@phylo$tip.label <- gsub(reo@phylo$tip.label, pattern = "lcl\\|", replacement="lcl_")
reo_clean <- clean_phylometa(reo, metadata = meta.reo)
reo_ictv_clean <- clean_phylometa(reo, metadata = reo.ictv)
reo_tree <- group_phylotaxa(reo, reo_clean, group = c("Family","Genus", "Host", "Geo_Location", "Classification"))
reo_tree@phylo$tip.label## [1] "AFH41509.1|Eubenangee virus"
## [2] "AGX00987.1|Wallal virus"
## [3] "AIT55713.1|Warrego virus"
## [4] "AGY34642.1|Changuinola virus"
## [5] "ACJ06234.1|Equine encephalosis virus"
## [6] "AFX73387.1|Orungo virus"
## [7] "AFX73376.1|Lebombo virus"
## [8] "AAC40586.1|African horse sickness virus"
## [9] "QCU80060.1|Palyam virus"
## [10] "AVO64741.1|Ninarumi virus"
## [11] "AEE98368.1|Umatilla virus"
## [12] "BAP18631.1|Umatilla virus"
## [13] "YP_002925132.1|Stretch Lagoon orbivirus"
## [14] "UBB38843.1|Wongorr virus"
## [15] "AXF35757.1|Kammavanpettai virus"
## [16] "QWF36631.1|Mudumu virus"
## [17] "ANH10670.1|Parry's Lagoon virus"
## [18] "QWE80471.1|Corriparta virus"
## [19] "lcl ORF1 NODE 1 length 3938 cov 956.313131 NEMO25 3933 70"
## [20] "ABB72770.1|Peruvian horse sickness virus"
## [21] "QNH88362.1|Peruvian horse sickness virus"
## [22] "AAW28767.1|Yunnan orbivirus"
## [23] "BCL59280.1|Yunnan orbivirus"
## [24] "UBT83574.1|Middle Point orbivirus"
## [25] "BCB92306.1|Yonaguni orbivirus"
## [26] "AYA60488.1|Mobuck virus"
## [27] "QRW41814.1|Lobuck virus"
## [28] "QCQ85356.1|CHeRI orbivirus 2-2"
## [29] "BCL59290.1|Guangxi orbivirus"
## [30] "ADM88592.1|Great Island virus"
## [31] "ATW68806.1|Okhotskiy virus"
## [32] "AHM93463.1|Great Island virus"
## [33] "AYM94291.1|Great Island virus"
## [34] "AKP24073.1|Wad Medani virus"
## [35] "AKP24085.1|Chenuda virus"
## [36] "ATW68818.1|Baku virus"
## [37] "AKP24097.1|Chobar Gorge virus"
## [38] "QBL15251.1|Chobar Gorge virus"
## [39] "QBL15263.1|Chobar Gorge virus"
## [40] "AGE32260.1|Sathuvachari virus"
## [41] "AGE32270.1|Orbivirus JKT-8132"
## [42] "BBE08033.1|Sathuvachari virus"
## [43] "AVO64731.1|Big Cypress virus"
## [44] "AZJ37597.1|Skunk River virus"
## [45] "QNS17459.1|Serbia reo-like virus 2"
## [46] "QVU40006.1|Amate virus"
## [47] "AAG34363.1|St Croix River virus"
## [48] "APG79108.1|Hubei lepidoptera virus 4"
## [49] "AQU42768.1|Morris orbivirus"
## [50] "APG79103.1|Hubei lepidoptera virus 5"
## [51] "QTW97832.1|Hubei reo-like virus 12"
## [52] "ASU50444.1|Anopheles annulipes orbivirus"
## [53] "APG79077.1|Hubei tetragnatha maxillosa virus 9"
## [54] "APG79144.1|Hubei odonate virus 15"
## [55] "ASU43982.1|Anopheles hinesorum orbivirus"
## [56] "lcl ORF2 NODE 1 length 4140 cov 359.250554 MEMO015 1249 4092 pasted"
## [57] "QNS17458.1|Serbia reo-like virus 1"
## [58] "DAZ85690.1|Aedes orbi-like virus"
## [59] "QRW41688.1|Lasigmu virus"
## [60] "QIJ70109.1|Hubei reo-like virus 14"
## [61] "AVM87459.1|Wenling scaldfish reovirus"
## [62] "BAF49639.1|Rice gall dwarf virus"
## [63] "AAB18743.1|Rice dwarf virus"
## [64] "AAZ94068.1|Aedes pseudoscutellaris reovirus"
## [65] "ACA53380.1|Cypovirus 16"
## [66] "AHJ14791.1|Cypovirus 2"
## [67] "AAK73521.1|Cypovirus 1"
## [68] "AAK73087.1|Cypovirus 14"
## [69] "AAO73182.1|Mal de Rio Cuarto virus"
## [70] "AAK40249.1|Fiji disease virus"
## [71] "BAA08542.1|Nilaparvata lugens reovirus"
## [72] "AAG34362.1|Colorado tick fever coltivirus"
## [73] "ATG88075.1|Tai Forest coltivirus"
## [74] "AXG65493.1|Kundal coltivirus"
## [75] "BBA54735.1|Tarumizu coltivirus"
## [76] "AAW29084.1|Liao ning virus"
## [77] "AAF78848.1|Kadipiro virus"
## [78] "AAF78849.1|Banna virus"
## [79] "AKC01920.1|Eriocheir sinensis reovirus"
Give clean metadata ictv!
reocladedf <- mrca_ictv(tree=reo_tree, meta =reo_ictv_clean,
group="Genus")
famvec <- sort(unique(reo_clean$Family))
col <- c(viridis::plasma(n = length(famvec)-1, begin=0.1, end=0.9))
#col <- c(rcartocolor::carto_pal(n = length(famvec), "Vivid"))
#col <- rev(col)[-1]
names(col) <- famvec[famvec!="unclassified"]
col <- c(col, unclassified="grey", NODE="#43BF71FF")
reo_tree@phylo$tip.label <- gsub(reo_tree@phylo$tip.label, pattern = "lcl ORF[0-9]* ", replacement="", perl = T)p <- plot_phylotree(reo_tree, col=col, shape=shape, cladedf = reocladedf, labels=c(famvec, "NODE"),
plainlabels = c("unclassified", "Belgian mosquitoes"), align=F, group="Family")+
#geom_nodepoint(aes(subset = node %in% reocladedf$node), size=1, shape=18)+
scale_y_reverse()+
xlim(-.05,7)
reoplot <- addSmallLegend(p, pointSize = 2.5, textSize = 5, spaceLegend = .5)+
theme(legend.text = element_text(vjust = .4),
legend.position = c(0.7,.3))
reoplotggsave(filename = here(phylo_path, "Reoviridae/reoviridae.pdf"), reoplot, dpi=1200, width=6.875)
reoplot_minimal <- plot_minimal_phylotree(reo_tree, align=T)+
scale_y_reverse()+
xlim(-.05,9)
reoplot_minimalggsave(filename = here(phylo_path, "Reoviridae/reoviridae_minimal.pdf"), reoplot_minimal, dpi=1200, width=6.875)Rerun tree with hmv4!
meta.tombus <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Tombusviridae/"), pattern = "*.csv", full.names = T), read_csv))
tombus.ictv <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Tombusviridae/"), pattern = "*_ictv.csv", full.names = T), read_csv))
tombus <- read.iqtree(here(phylo_path, "Tombusviridae/tombusviridae2.treefile"))
tombus@phylo <- phytools::midpoint.root(tombus@phylo)
tombus@phylo$tip.label <- gsub(tombus@phylo$tip.label, pattern = "lcl\\|", replacement="lcl_")
meta.tombus <- meta.tombus %>%
mutate(Classification = case_when(Accession %in% tombus.ictv$Accession ~ "ICTV",
TRUE ~ "NCBI"))
tombus_clean <- clean_phylometa(tombus, metadata = meta.tombus)
tombus_ictv_clean <- clean_phylometa(tombus, metadata = tombus.ictv)
tombus_tree <- group_phylotaxa(tombus, tombus_clean,
group = c("Family","Genus", "Host", "Geo_Location", "Classification"))Give clean metadata ictv!
tombuscladedf <- mrca_ictv(tree = tombus_tree, meta = tombus_ictv_clean,
group="Genus")
famvec <- sort(unique(tombus_clean$Genus))
#col <- c(viridis::plasma(n = length(famvec)-1, begin=0.1, end=0.9))
col <- c(rcartocolor::carto_pal(n = length(famvec), "Prism"))
col <- rev(col)[-1]
names(col) <- famvec[famvec!="unclassified"]
col <- c(col, unclassified="grey", NODE="#43BF71FF")
tombus_tree@phylo$tip.label <- gsub(tombus_tree@phylo$tip.label, pattern = "lcl ORF[0-9]* ", replacement="", perl = T)
tombus_tree## 'treedata' S4 object that stored information
## of
## '/Users/lander/Library/CloudStorage/OneDrive-KULeuven/Documents/Manuscripts/Belgian
## mosquitoes
## (2022)/BMV-analysis/phylogenetics/Tombusviridae/tombusviridae2.treefile'.
##
## ...@ phylo:
##
## Phylogenetic tree with 94 tips and 93 internal nodes.
##
## Tip labels:
## QRW42294.1|Marma virus, NODE 1 length 3084 cov 15.137346 MEMO008 1944 2999
## unnamed protein product, BBQ04781.1|Culex inatomii luteo-like virus,
## YP_009337877.1|Hubei mosquito virus 2, QOW03291.1|Kisumu mosquito virus,
## QZZ63345.1|Leuven Sobemo-like virus 1, ...
## Node labels:
## Root, 88, 49, 48, 60, 32, ...
##
## Rooted; includes branch lengths.
##
## with the following features available:
## 'SH_aLRT', 'UFboot'.
##
## # The associated data tibble abstraction: 187 × 10
## # The 'node', 'label' and 'isTip' are from the phylo tree.
## node label Family Genus Host Geo_Location Classification isTip SH_aLRT
## <int> <chr> <fct> <fct> <fct> <fct> <fct> <lgl> <dbl>
## 1 1 QRW42294.… Luteo… uncl… unkn… USA NCBI TRUE NA
## 2 2 NODE 1 le… NODE NODE NODE NODE NODE TRUE NA
## 3 3 BBQ04781.… uncla… uncl… Cule… Japan NCBI TRUE NA
## 4 4 YP_009337… uncla… uncl… Culi… China NCBI TRUE NA
## 5 5 QOW03291.… uncla… uncl… Aede… Kenya NCBI TRUE NA
## 6 6 QZZ63345.… Solem… uncl… unkn… Belgium NCBI TRUE NA
## 7 7 YP_009337… uncla… uncl… Culi… China NCBI TRUE NA
## 8 8 AXQ04793.… Luteo… uncl… unkn… USA NCBI TRUE NA
## 9 9 QZZ63315.… Luteo… uncl… unkn… Belgium NCBI TRUE NA
## 10 10 YP_009330… uncla… uncl… Scut… China NCBI TRUE NA
## # ℹ 177 more rows
## # ℹ 1 more variable: UFboot <dbl>
p <- plot_phylotree(tombus_tree, col=col, shape=shape, cladedf = tombuscladedf,
labels=c(famvec, "NODE"),
plainlabels = c("unclassified", "Belgian mosquitoes"), align=F,
group="Genus")+
#geom_nodepoint(aes(subset= node %in% tombuscladedf$node), size=1, shape=18)+
#geom_nodepoint(aes(fill=as.numeric(label), subset = !is.na(as.numeric(label))), shape=23, color="transparent", size=1)+
#scale_fill_gradientn(colours = c("red2","orange","gold1","forestgreen")) +
scale_y_reverse()+
xlim(-.05,4)
tombusplot <- addSmallLegend(p, pointSize = 2.5, textSize = 5, spaceLegend = .5)+
theme(legend.text = element_text(vjust = .4),
legend.position = c(0.1,.2))+
guides(shape="none")
tombusplotggsave(filename = here(phylo_path, "Tombusviridae/tombusviridae.pdf"), tombusplot, dpi=1200, width=6.875)
#ggsave(filename = here(phylo_path, "Tombusviridae/tombusviridae_meeting.pdf"), tombusplot+theme(legend.position="left"), dpi=300, width=5.6, height=3.5)
tombusplot_minimal <- plot_minimal_phylotree(tombus_tree, align=T)+
scale_y_reverse()+
xlim(-.05,5)
tombusplot_minimalggsave(filename = here(phylo_path, "Tombusviridae/tombusviridae_minimal.pdf"), tombusplot_minimal, dpi=1200, width=6.875)meta.negev <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Negevirus/"), pattern = "*.csv", full.names = T), read_csv))
#tombus.ictv <- do.call(rbind, lapply(list.files(path=here(phylo_path, "Tombusviridae/"), pattern = "*_ictv.csv", full.names = T), read_csv))
negev <- read.iqtree(here(phylo_path, "Negevirus/negeviruses.treefile"))
negev@phylo <- phytools::midpoint.root(negev@phylo)
meta.negev <- meta.negev %>%
mutate(Classification = "NCBI")
negev@phylo$tip.label <- gsub(negev@phylo$tip.label, pattern = "lcl\\|", replacement="lcl_")
negev_clean <- clean_phylometa_negev(negev, metadata = meta.negev)
#negev_ictv_clean <- clean_phylometa(negev, metadata = negev.ictv)
negev_tree <- group_phylotaxa(negev, negev_clean,
group = c("Family","Genus", "Host", "Geo_Location", "Classification"))Give clean metadata ictv!
# negevcladedf <- mrca_ictv(tree = negev_tree, meta = negev_ictv_clean,
# group="Genus")
famvec <- sort(unique(negev_clean$Genus))
col <- c(viridis::plasma(n = length(famvec)-1, begin=0.1, end=0.9))
names(col) <- famvec[famvec!="unclassified"]
col <- c(col, unclassified="grey", NODE="#43BF71FF")
negev_tree@phylo$tip.label <- gsub(negev_tree@phylo$tip.label, pattern = "lcl ORF[0-9]* ", replacement="", perl = T)
negev_tree## 'treedata' S4 object that stored information
## of
## '/Users/lander/Library/CloudStorage/OneDrive-KULeuven/Documents/Manuscripts/Belgian
## mosquitoes (2022)/BMV-analysis/phylogenetics/Negevirus/negeviruses.treefile'.
##
## ...@ phylo:
##
## Phylogenetic tree with 36 tips and 35 internal nodes.
##
## Tip labels:
## BAS69360.1|Negevirus Nona 1, YP_009182191.1|Daeseongdong virus 1,
## YP_009552739.1|Ying Kou virus, QOK99941.1|Mekrijarvi negevirus,
## YP_009351827.1|San Bernardo virus, AQM55484.1|Brejeira virus, ...
## Node labels:
## Root, 76, 99, 96, 99, 100, ...
##
## Rooted; includes branch lengths.
##
## with the following features available:
## 'SH_aLRT', 'UFboot'.
##
## # The associated data tibble abstraction: 71 × 10
## # The 'node', 'label' and 'isTip' are from the phylo tree.
## node label Family Genus Host Geo_Location Classification isTip SH_aLRT
## <int> <chr> <fct> <fct> <fct> <fct> <fct> <lgl> <dbl>
## 1 1 BAS69360.… uncla… Nege… Okus… Japan NCBI TRUE NA
## 2 2 YP_009182… uncla… uncl… Cule… South Korea NCBI TRUE NA
## 3 3 YP_009552… uncla… Nege… Cule… China NCBI TRUE NA
## 4 4 QOK99941.… uncla… Nege… Aede… Finland NCBI TRUE NA
## 5 5 YP_009351… uncla… uncl… Culi… Colombia NCBI TRUE NA
## 6 6 AQM55484.… uncla… Nege… Culex Brazil NCBI TRUE NA
## 7 7 YP_009351… uncla… Nege… Culex Colombia NCBI TRUE NA
## 8 8 CCV01575.… uncla… Nege… Ochl… unknown NCBI TRUE NA
## 9 9 NODE 1 le… NODE NODE NODE NODE NODE TRUE NA
## 10 10 AXQ04831.… Nodav… uncl… Culex USA NCBI TRUE NA
## # ℹ 61 more rows
## # ℹ 1 more variable: UFboot <dbl>
p <- plot_phylotree(negev_tree, col=col, shape=shape,
labels=c(famvec, "NODE"),
plainlabels = c("unclassified", "Belgian mosquitoes"), align=F,
group="Genus")+
#geom_nodepoint(aes(subset= node %in% negevcladedf$node), size=1, shape=18)+
scale_y_reverse()+
xlim(-.05,3.5)
negevplot <- addSmallLegend(p, pointSize = 2.5, textSize = 5, spaceLegend = .5)+
theme(legend.text = element_text(vjust = .4),
legend.position = c(0.2,.2))
negevplotggsave(filename = here(phylo_path, "Negevirus/negeviruses.pdf"), negevplot, dpi=1200, width=6.875)
negevplot_minimal <- plot_minimal_phylotree(negev_tree, align=T)+
scale_y_reverse()+
xlim(-.05,5)
negevplot_minimalggsave(filename = here(phylo_path, "Negevirus/negeviruses_minimal.pdf"), negevplot_minimal, dpi=1200, width=6.875)# p1 <- ggarrange(endornaplot, orthoplot,
# bunyaplot, picornaplot,
# ghabriplot, reoplot,
# ncol=2, nrow=3, labels="AUTO",
# heights = c(.2,.4,.4))
# p1 <- ggarrange(endornaplot, orthoplot,
# bunyaplot, picornaplot,
# ghabriplot, reoplot,
# tombusplot, negevplot,
# ncol=2, nrow=4, labels="AUTO",
# heights = c(.15,.25,.25,.25))
# p1
p1 <- ggarrange(negevplot, endornaplot, orthoplot,
picornaplot, bunyaplot, ghabriplot,
reoplot, tombusplot, nodaplot,
ncol=3, nrow=3, labels="AUTO",
heights = c(.25, .37, .37),
font.label = list(size=10))
#p1# ggarrange(p1,
# ggarrange(NULL, tombusplot+theme(legend.position = "right"), NULL,
# widths=c(0.1,0.8,0.1), labels=c("","G",""), ncol=3),
# ncol=1, heights = c(.75, .25))
# p1 <- ggarrange(p1,
# ggarrange(reoplot, tombusplot, nodaplot,#+theme(legend.position = "right"),
# labels=c("G","H", "I"), ncol=3),
# ncol=1, heights = c(.6, .4)) #0.6, 0.4, widths=c(0.4,0.4, 0.2)
#ggsave("figures/treeplots.pdf", width=17, height = 28, units = "in", dpi=300)
ggsave("figures/treeplots.pdf", width=20, height = 20, units = "in", dpi=600)
ggsave("figures/treeplots_small.pdf", width=6.875, height = 9.0625, units = "in", dpi=1200)Minimal phylogenetic trees:
# p2 <- ggarrange(endornaplot_minimal, orthoplot_minimal,
# bunyaplot_minimal, picornaplot_minimal,
# ghabriplot_minimal, reoplot_minimal,
# ncol=2, nrow=3, labels="AUTO",
# heights = c(.2,.4,.4))
p2 <- ggarrange(endornaplot_minimal, orthoplot_minimal, negevplot_minimal,
bunyaplot_minimal, picornaplot_minimal, ghabriplot_minimal,
ncol=3, nrow=2, labels="AUTO",
heights = c(.4,.6))# ggarrange(p2,
# ggarrange(NULL, tombusplot_minimal, NULL,
# widths=c(0.1,0.8,0.1), labels=c("","G",""), ncol=3),
# ncol=1, heights = c(.75, .25))
#ggarrange(p2,
# ggarrange(reoplot_minimal, tombusplot_minimal,
# widths=c(0.5,0.5), labels=c("G","H"), ncol=2),
# ncol=1, heights = c(.7, .3))
p2 <- ggarrange(negevplot_minimal, endornaplot_minimal, orthoplot_minimal,
picornaplot_minimal, bunyaplot_minimal, ghabriplot_minimal,
reoplot_minimal, tombusplot_minimal, nodaplot_minimal,
ncol=3, nrow=3, labels="AUTO",
heights = c(.25, .37, .37),
font.label = list(size=10))
#ggsave("figures/treeplots_minimal.pdf", width=17, height = 28, units = "in", dpi=300)
ggsave("figures/treeplots_minimal.pdf", width=20, height = 20, units = "in", dpi=600)
ggsave(plot=p2,filename="figures/treeplots_minimal_small.pdf", width=6.875, height = 9.0625, units = "in", dpi=600)#MEMO_wolb <- read.table("data/MEMO050.bowtie.depth", header = T)
MEMO_wolb <- read.table("data/MEMO043.bowtie.depth", header = T)
phageblast <- read.table("data/phage_blast.tsv", header=F, sep="\t")
phageblast <- phageblast[phageblast$V4 > 2000,]
rRNA <- data.frame(name=c("16S", "23S", "5S"),
start=c(1136001, 1235938, 1238768),
end=c(1137446, 1238682, 1238878)
)
prophage <- data.frame(
name=c("p1", "p2", "p3", "p4", "p5"),
start=c(250652, 320006, 346157, 444716, 1376656),
end=c(266846, 337904, 360657, 483289, 1413160)
)
covplot <- ggplot(MEMO_wolb, aes_string(x = "pos", y=names(MEMO_wolb[3])))+
geom_line()+
geom_rect(data = phageblast, inherit.aes=FALSE, aes(xmin=V9, xmax=V10, ymin=-max(MEMO_wolb[3])*0.066, ymax=-max(MEMO_wolb[3])*0.043,
fill="#3977AF", color="#3977AF"), alpha=1)+
geom_rect(data = rRNA, inherit.aes=FALSE, aes(xmin=start, xmax=end, ymin=-max(MEMO_wolb[3])*0.036, ymax=-max(MEMO_wolb[3])*0.012,
fill="red", color="red"), alpha=1)+
geom_rect(data = prophage, inherit.aes=FALSE, aes(xmin=start, xmax=end, ymin=-max(MEMO_wolb[3])*0.036, ymax=-max(MEMO_wolb[3])*0.012,
fill="limegreen", color="limegreen"), alpha=1)+
labs(x="Nucleotide position", y="Read depth")+
#ggtitle("Wolbachia coverage plot of sample MEMO050")+
scale_fill_identity(name = "",
breaks = c("#3977AF", "limegreen", "red"),
labels = c("phageWO prophage region (BLASTn)", "prophage regions (Genbank)", "rRNA genes"),
guide = "legend")+
scale_color_identity(name = "",
#breaks = c("#3977AF", "red"),
#labels = c("phageWO prophage region", "rRNA genes"),
guide = "none",#"legend"
)+
theme_bw()+
theme(legend.position = c(.3,.93), #"top"
legend.background=element_rect(fill=NA, color=NA),
legend.margin = margin(t = 0, r = 0, b = 0, l = 0, unit='cm'),
legend.key.size = unit(3, 'mm'),
plot.title = element_text(size=9, face="bold"),
#panel.grid.major = element_line(size = .25),
panel.grid=element_blank())+
guides(fill = guide_legend(override.aes = list(alpha = 1)))
covplotggsave("figures/wolbachia_coverageplot_MEMO043.pdf", dpi=300, width = 7.5, height=5)library(ggbeeswarm)
#phage <- read.table("data/wolbachia_phage_ratio.tsv", sep="\t", header = T)
#phage <- read.table("data/wolbachia_phage_bowtieratio_no_rRNA.tsv", sep="\t", header = T)
phage <- read.table("data/wolbachia_prophage_gb_bowtieratio_no_rRNA.tsv", sep="\t", header = T)
phage <- merge(phage, meta, by="Sample")
phage <- phage %>%
mutate(ratio=case_when(ratio==NA ~ 0,
nonphage<1 ~ 0,
TRUE ~ ratio))
phage %>%
filter(ratio>0) %>%
summarise(median(ratio))swarmplot <- ggplot(phage, aes(x = SKA_Subspecies, y= ratio, color=ifelse(ratio==0|nonphage<1, NA, ratio>3.5)))+
geom_quasirandom(width = .4)+
scale_color_manual(values = c("FALSE"="#3977AF", "TRUE"="darkorange", "NA"="#7F7F7F"),
name="",
labels=c("Prophage", "Real phage particle?", "Average depth < 1"))+
geom_text(aes(label=ifelse(ratio > 3.5&nonphage>1, Sample,"")), nudge_x=.5, size=3,
show.legend = F)+
ylab(paste0("<span style='font-size: 11pt'>Depth ratio</span><br><span style='font-size: 8pt'>*(average depth phage/average depth nonphage)*</span>"))+
theme_bw()+
theme(axis.title.x = element_blank(),
axis.text.x = element_text(face="italic", size = 6),
legend.position = c(.2, .93),
legend.background = element_blank(),
legend.key.size = unit(3, 'mm'),
panel.grid = element_blank(),
axis.title.y = ggtext::element_markdown())
swarmplotWolbachia bwa-mem2 mapped
wMel <- read.table("data/wMel.txt", header=F, dec=".", sep=" ")
wPip <- read.table("data/wPip.txt", header=F, dec=".", sep=" ")
phageWO <- read.table("data/phageWO.txt", header=F, dec=".", sep=" ")
colnames(wMel) <- c("Sample", "numreads", "covbases", "coverage", "meandepth")
colnames(wPip) <- c("Sample", "numreads", "covbases", "coverage", "meandepth")
colnames(phageWO) <- c("Sample", "supergroup", "numreads", "covbases", "coverage", "meandepth")
wPip$supergroup <- "wPip"
wMel$supergroup <- "wMel"
wolb <- rbind(wMel, wPip, phageWO)
wolb %<>%
plyr::mutate(supergroup=str_replace(string=supergroup, pattern = "NODE_12_length_11674_cov_94.907217_MEMO129", replacement = "phageWO2")) %>%
plyr::mutate(supergroup=str_replace(string=supergroup, pattern = "NODE_1_length_31159_cov_25.624799_MEMO050", replacement = "phageWO1"))
wolb$numreads[wolb$coverage<5] <- 0
wolb$meandepth[wolb$coverage<5] <- 0
left_join(wolb, meta) %>%
filter(supergroup=="wPip") %>%
group_by(SKA_Subspecies) %>%
summarise(n=n(), wol_pos=sum(numreads>0)) %>%
mutate(wol_freq=round(wol_pos/n*100, 3))wviolin <- pivot_wider(wolb, id_cols = "Sample", names_from = "supergroup", values_from = "numreads") %>%
ggplot(aes(x = meta[meta$Sample!="NEMO39",]$SKA_Subspecies, y=wPip))+
geom_boxplot(aes(color=meta[meta$Sample!="NEMO39",]$SKA_Subspecies), width=0.1, outlier.shape = NA)+
geom_violin(aes(fill=meta[meta$Sample!="NEMO39",]$SKA_Subspecies), color=NA, show.legend = F, trim = T)+
geom_jitter(aes(color=meta[meta$Sample!="NEMO39",]$SKA_Subspecies), size=1, alpha=0.6,
position=position_jitter(w=0.1,h=0)) +
theme_bw()+
theme(axis.text.x = element_blank(),
panel.grid = element_blank(),
axis.ticks.x = element_blank())+
xlab(label = "")+
ylab(label = "Wolbachia read count")+
scale_fill_manual(values=alpha(myColors, 0.1), name='')+
scale_color_viridis_d(begin=0.4, end =0.9, name="")+
guides(fill = guide_legend(#override.aes = list(alpha = 1),
label.theme = element_text(size=8, angle = 0, face = "italic")))+
scale_y_continuous(trans=scales::pseudo_log_trans(base = 10), breaks = c(0, 10^seq(2,8,2)))
wviolinggsave("figures/wolbachia_readcount.pdf", dpi=300)
ggarrange(ggarrange(covplot+theme(legend.key.size = unit(3, 'mm')), swarmplot,
ncol=1, labels = "AUTO", align="v"),
wviolin+
theme(legend.position = c(.15, .92),
legend.background = element_blank()),
labels = c("", "C"), widths = c(1, 1.25), align = "v")ggsave("figures/combined_wolbachia.pdf", dpi=300, width = 12)
p1<-ggarrange(covplot+theme(legend.key.size = unit(3, 'mm')), swarmplot,
ncol=1, labels = "AUTO")
p2 <- wviolin+
theme(legend.position = c(.15,.93),
legend.background = element_blank(),
panel.background = element_blank())
p2library(patchwork)
#patch <- covplot + swarmplot + plot_spacer() + p2 + plot_layout(widths = c(1, .01, 1.5),
# design = "
# 134
# 234")
#patch + plot_annotation(tag_levels = "A") &
# theme(plot.tag = element_text(face = "bold"))
patch <- p2 + plot_spacer() + covplot + swarmplot + plot_layout(widths = c(1.5, .01, 1),
design = "
123
124")
patch + plot_annotation(tag_levels = "A") &
theme(plot.tag = element_text(face = "bold"))ggsave("figures/combined_wolbachia.pdf", dpi=300, width = 12)Second layout
patch2 <- covplot + swarmplot + plot_spacer() + p2 + plot_layout(widths = c(1, .01, 1.5),
design = "
134
234")
patch2 + plot_annotation(tag_levels = "A") &
theme(plot.tag = element_text(face = "bold"))ggsave("figures/combined_wolbachia_2.pdf", dpi=300, width = 12)barplot
wb <- pivot_wider(wolb, id_cols = "Sample", names_from = "supergroup", values_from = "numreads") %>%
ggplot(aes(x = Sample, y=wPip))+
geom_col(aes(fill=meta[meta$Sample!="NEMO39",]$SKA_Subspecies))+
theme_bw()+
theme(axis.text.x = element_blank(),
panel.grid.major = element_blank())+
scale_y_continuous(expand=expansion(mult = c(0, 0.01), add = c(0, 0.1)))+
xlab(label = "Sample")+
ylab(label = "Wolbachia read count")+
labs(shape="Virus", color="Virus")+
scale_fill_viridis_d(begin=0.4, end =0.9, name="")+
guides(fill = guide_legend(override.aes = list(alpha = 1),
label.theme = element_text(size=10, angle = 0, face = "italic")))+
facet_grid(~meta[meta$Sample!="NEMO39",]$Municipality, scales = "free", space = "free")
wbphageotu<-read.table("data/BEmosq_classification-85-taxfile-1000nt.tsv", sep="\t", header = T)
phage<-phageotu[phageotu$Phylum == "Phage",]
rownames(phage)<- phage$Contig
phage <- subset(phage, select=-Contig)
newtax<-tax[!rownames(tax) %in% rownames(phage),]
newtax <- rbind(newtax, phage)
tax_table(BMV) <- as.matrix(newtax) #Maybe better solution
#newtax.UF <- tax_table(as.matrix(newtax))
#BMV <- phyloseq(OTU.UF, newtax.UF, meta.UF)
BMV.V <- subset_taxa(BMV, Kingdom=="Viruses")
phageome <- subset_taxa(BMV.V, Phylum=="Phage" | Order=="Caudovirales" | Family=="Inoviridae")
phageome## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 41 taxa and 197 samples ]
## sample_data() Sample Data: [ 197 samples by 13 sample variables ]
## tax_table() Taxonomy Table: [ 41 taxa by 7 taxonomic ranks ]
otu_table(phageome) <- as.data.frame(otu_table(phageome)) %>%
mutate(otu=rownames(.)) %>%
pivot_longer(cols=1:197, names_to = "Sample", values_to = "abundance") %>%
mutate(abundance=if_else(otu %in% c("NODE_1_length_31159_cov_25.624799_MEMO050|full",
"NODE_12_length_11674_cov_94.907217_MEMO129|full")
& Sample != "NEMO039", 0, as.double(abundance))) %>%
pivot_wider(names_from = Sample, values_from = abundance) %>%
column_to_rownames('otu') %>%
otu_table(taxa_are_rows = T)
#sample_data(phageome) <- data.frame(wv.meta, row.names = 1)
#sample_data(phageome)
phage_pruned <- prune_samples(sample_sums(phageome)>0, phageome)
phage_pruned <- prune_taxa(taxa_sums(phage_pruned)>0, phage_pruned)
phage_pruned## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 28 taxa and 4 samples ]
## sample_data() Sample Data: [ 4 samples by 13 sample variables ]
## tax_table() Taxonomy Table: [ 28 taxa by 7 taxonomic ranks ]
pp <- as.data.frame(otu_table(phage_pruned))
p.merge <- merge_taxa(phage_pruned, 1:2, 2)
p.merge <- merge_taxa(p.merge, 2:24, 2)
p.merge <- merge_taxa(p.merge, 3:4, 2)
tax_table(p.merge)## Taxonomy Table: [4 taxa by 7 taxonomic ranks]:
## Kingdom Phylum
## NODE_4_length_5754_cov_10.923727_MEMO048|full "Viruses" NA
## NODE_2_length_9043_cov_21.187821_MEMO141|full "Viruses" NA
## NODE_17_length_1194_cov_3.745748_MEMO017|full "Viruses" "Uroviricota"
## NODE_1_length_5000_cov_44.805403_NEMO08|full "Viruses" "Phage"
## Class Order
## NODE_4_length_5754_cov_10.923727_MEMO048|full NA NA
## NODE_2_length_9043_cov_21.187821_MEMO141|full NA NA
## NODE_17_length_1194_cov_3.745748_MEMO017|full "Caudoviricetes" "Caudovirales"
## NODE_1_length_5000_cov_44.805403_NEMO08|full "unclassified" "unclassified"
## Family Genus
## NODE_4_length_5754_cov_10.923727_MEMO048|full NA NA
## NODE_2_length_9043_cov_21.187821_MEMO141|full NA NA
## NODE_17_length_1194_cov_3.745748_MEMO017|full "Siphoviridae" "unclassified"
## NODE_1_length_5000_cov_44.805403_NEMO08|full "unclassified" "unclassified"
## Species
## NODE_4_length_5754_cov_10.923727_MEMO048|full NA
## NODE_2_length_9043_cov_21.187821_MEMO141|full NA
## NODE_17_length_1194_cov_3.745748_MEMO017|full "Erwinia phage Midgardsormr38"
## NODE_1_length_5000_cov_44.805403_NEMO08|full "unclassified"
otu_table(p.merge)## OTU Table: [4 taxa and 4 samples]
## taxa are rows
## MEMO017 MEMO048 MEMO141 NEMO08
## NODE_4_length_5754_cov_10.923727_MEMO048|full 0 3945 0 0
## NODE_2_length_9043_cov_21.187821_MEMO141|full 0 0 17529 0
## NODE_17_length_1194_cov_3.745748_MEMO017|full 561 0 0 0
## NODE_1_length_5000_cov_44.805403_NEMO08|full 0 0 0 7751
plot_heatmap(p.merge, taxa.label = "Species", low ="#FFF7B8", high = "#800026", na.value = "#FFFFCC")+
theme_bw()+
theme(axis.title = element_blank())phage_tax <- as.data.frame(tax_table(p.merge))
phage_tax <- replace_na(phage_tax, list(Phylum="Uroviricota", Class="Caudoviricetes", Order="Caudovirales",
Family="unclassified", Genus="unclassified", Species="unclassified"))
tax_table(p.merge) <- as.matrix(phage_tax)viral_reads <- as.data.frame(colSums(otu_table(BMV.V2)))
viral_reads <- rownames_to_column(viral_reads, "Sample")
colnames(viral_reads) <- c("Sample", "viral_reads")
wol_virus_df <- left_join(wPip, viral_reads)
fit<-lm(log10(viral_reads+1)~coverage, data=wol_virus_df)
summary(fit)##
## Call:
## lm(formula = log10(viral_reads + 1) ~ coverage, data = wol_virus_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.788 -1.684 -1.542 1.948 4.718
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.788536 0.244070 7.328 6.05e-12 ***
## coverage -0.003115 0.004725 -0.659 0.51
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.117 on 195 degrees of freedom
## Multiple R-squared: 0.002225, Adjusted R-squared: -0.002892
## F-statistic: 0.4348 on 1 and 195 DF, p-value: 0.5104
wol_virus_df %>%
#mutate(viral_reads=viral_reads+1) %>%
ggplot(aes(x=coverage, y=viral_reads))+
geom_point()+
geom_smooth(method=lm)+
geom_vline(xintercept = 5, linetype="dashed",
color = "red")+
#scale_x_continuous(trans='log10')+
scale_y_continuous(trans=scales::pseudo_log_trans(base = 10), breaks = c(0, 10^seq(2,8,2)))+
#scale_y_continuous(trans='log10')+
ylab("log10(viral reads+1)")+
xlab("Wolbachia wPip genome coverage")+
labs(title = paste("Adj R2 = ",signif(summary(fit)$adj.r.squared, 5),
"Intercept =",signif(fit$coef[[1]],5 ),
" Slope =",signif(fit$coef[[2]], 5),
" P =",signif(summary(fit)$coef[2,4], 5)))+
theme_bw()ggsave("figures/wolbachia_virus_correlation.pdf", dpi = 300)
left_join(wPip, viral_reads) %>%
ggplot(aes(x=coverage>5, y=viral_reads))+
scale_y_continuous(trans=scales::pseudo_log_trans(base = 10), breaks = c(0, 10^seq(2,8,2)))+
geom_boxplot()+
geom_point()left_join(wPip, rownames_to_column(vcount, "Sample")) %>%
ggplot(aes(x=coverage, y=viral_count))+
#geom_boxplot()+
geom_point()+
geom_smooth(method = lm)###pWCP coverage
pWCP_cov <- read.table("data/pWCP_stats.tsv", header=T)
#names(pWCP_cov) <- c("Sample", "pWCP_cov")
left_join(wPip, pWCP_cov, by="Sample") %>%
left_join(meta[meta$Sample!="NEMO39",]) %>%
mutate(meandepth.x=case_when(coverage.x<5 ~ 0,
TRUE ~ meandepth.x),
meandepth.y=case_when(coverage.x<5 ~ 0,
TRUE ~ meandepth.y),
completeness=cut(pWCP_cov$coverage, breaks = c(0, 25, 50, 75, 100), include.lowest = T)) %>%
ggplot(aes(x=meandepth.x, y=meandepth.y))+
geom_smooth(method = "lm", se=F, col="lightgrey")+
geom_point(aes(col=SKA_Subspecies, shape=completeness))+
scale_color_manual(values = myColors)+
scale_shape_manual(values=c(3, 17, 15, 19))+
scale_y_continuous(trans = pseudo_log_trans())+
scale_x_continuous(trans = pseudo_log_trans())+
labs(color="Mosquito species",
x="Wolbachia sequencing depth",
y="pWCP plasmid sequencing depth")+
theme_bw()+
guides(col = guide_legend(override.aes = list(shape = 15, size = 5),
label.theme = element_text(size=10, face="italic")))ggsave("figures/pWCP_coverage.pdf", dpi=300)
left_join(wPip, pWCP_cov, by="Sample") %>%
left_join(meta[meta$Sample!="NEMO39",]) %>%
mutate(meandepth.x=case_when(coverage.x<5 ~ 0,
TRUE ~ meandepth.x),
meandepth.y=case_when(coverage.x<5 ~ 0,
TRUE ~ meandepth.y),
completeness=cut(pWCP_cov$coverage, breaks = c(0, 25, 50, 75, 100), include.lowest = T)) %>%
ggplot(aes(x=meandepth.x, y=meandepth.y))+
geom_smooth(method = "lm", se=F, col="lightgrey")+
geom_point(aes(col=coverage.y, shape=SKA_Subspecies))+
scale_color_gradientn(colours = viridis(10), guide = guide_colourbar(direction = "horizontal", title.position="top"))+
scale_y_continuous(trans = pseudo_log_trans())+
scale_x_continuous(trans = pseudo_log_trans())+
labs(color="pWCP completeness (%)",
shape="Mosquito species",
x="Wolbachia sequencing depth",
y="pWCP plasmid sequencing depth")+
theme_bw()#ggsave("figures/pWCP_coverage.pdf", dpi=300)